Part 1: PCA With Penguins

penguin_pca <- penguins %>% 
  select(body_mass_g, 
         ends_with("_mm")) %>% 
  drop_na() %>% # To listwise delete all observations with "na".
  scale() %>% # To change all numerical values to a -1 to 1 decimal scale of similarity.
  prcomp() # To tell R that the -1 to 1 decimal scale represents principal component values.

penguin_pca$rotation # To view your PC values for each variable.
##                          PC1         PC2        PC3        PC4
## body_mass_g        0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm     0.4552503 0.597031143  0.6443012 -0.1455231
## bill_depth_mm     -0.4003347 0.797766572 -0.4184272  0.1679860
## flipper_length_mm  0.5760133 0.002282201 -0.2320840  0.7837987
penguin_complete <- penguins %>% 
  drop_na(body_mass_g, 
          ends_with("mm")) # Using the full dataset and then only listwise deleting observations for the variables of interest.

autoplot(penguin_pca, # To create a complete ggplot appropriate to a particular data type.
         data = penguin_complete, 
         colour = "species", 
         loadings = TRUE, # To insert arrows to indicate each PC.
         loadings.label = TRUE) + # To add the PC names onto each PC arrow.
  theme_minimal()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Part 2: ‘ggplot2’ Customization and Reading in Different File Types

Read in an .xlsx file and do some wrangling

fish_noaa <- read_excel(here("data", 
                             "foss_landings.xlsx")) %>% 
  clean_names() %>% 
  mutate(across(where(is.character), 
                tolower)) %>% # To say mutate columns "across" where a certain character class is, and make those a lower character class.
  mutate(nmfs_name = str_sub(nmfs_name, 
                             end = -4)) %>% # To extract and replace substrings from a character vector.
  filter(confidentiality == "public")

Make a customized gragh

fish_plot <- ggplot(data = fish_noaa, # To store a graph.
                    aes(x = year, 
                        y = pounds)) + 
  geom_line(aes(color = nmfs_name), 
            show.legend = FALSE) + 
  theme_minimal()

fish_plot # To show a stored graph (there are so many species that it only output the legend - use 'show.legend = FALSE' to instead show the line graph). 
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplotly(fish_plot) # To create an interactive graph.
ggplot(data = fish_noaa, 
       aes(x = year, 
           y = pounds, 
           group = nmfs_name)) + 
  geom_line() + 
  theme_minimal() + 
  gghighlight(nmfs_name == "tunas") # To highlight a certain series.
## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## label_key: nmfs_name
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplot(data = fish_noaa, 
       aes(x = year, 
           y = pounds, 
           group = nmfs_name)) + 
  geom_line(aes(colour = nmfs_name)) + 
  theme_minimal() + 
  gghighlight(max(pounds) > 1e8) # To highlight a certain series above a specified threshold value.
## label_key: nmfs_name
## Warning: Removed 6 row(s) containing missing values (geom_path).

Read in data from a URL, ‘lubridate()’, ‘mutate()’, and make a graph with months in logical order

monroe_wt <- read_csv("https://data.bloomington.in.gov/dataset/2c81cfe3-62c2-46ed-8fcf-83c1880301d1/resource/13c8f7aa-af51-4008-80a9-56415c7c931e/download/mwtpdailyelectricitybclear.csv") %>% 
  clean_names()
## Parsed with column specification:
## cols(
##   date = col_character(),
##   kWh1 = col_double(),
##   kW1 = col_double(),
##   kWh2 = col_double(),
##   kW2 = col_double(),
##   solar_kWh = col_double(),
##   total_kWh = col_double(),
##   MG = col_double()
## )
monroe_ts <- monroe_wt %>% 
  mutate(date = mdy(date)) %>% 
  mutate(record_month = month(date)) %>% 
  mutate(month_name = month.abb[record_month]) %>% # To add a column with the appropriate abbreviation for each corresponding month number.
  mutate(month_name = fct_reorder(month_name, 
                                  record_month)) # To reorder the abbreviated months column so that the below graph results in the x-axis values being in chronological order (Jan. - Dec.).

ggplot(data = monroe_ts, 
       aes(month_name, 
           y = total_k_wh)) + 
  geom_jitter()